// This file is part of Notepad4.
// See License.txt for details about distribution and modification.
//! Lexer for Lua

#include <cassert>
#include <cstring>

#include <string>
#include <string_view>

#include "ILexer.h"
#include "Scintilla.h"
#include "SciLexer.h"

#include "WordList.h"
#include "LexAccessor.h"
#include "Accessor.h"
#include "StyleContext.h"
#include "CharacterSet.h"
#include "StringUtils.h"
#include "LexerModule.h"

using namespace Lexilla;

namespace {

//KeywordIndex++Autogenerated -- start of section automatically generated
enum {
	KeywordIndex_Keyword = 0,
	KeywordIndex_BasicFunction = 1,
	KeywordIndex_Metamethod = 2,
	MaxKeywordSize = 20,
};
//KeywordIndex--Autogenerated -- end of section automatically generated

struct EscapeSequence {
	int outerState = SCE_LUA_DEFAULT;
	int digitsLeft = 0;
	bool hex = false;
	bool brace = false;

	// highlight any character as escape sequence.
	void resetEscapeState(int state, int chNext) noexcept {
		outerState = state;
		digitsLeft = 1;
		hex = true;
		brace = false;
		if (chNext == 'x') {
			digitsLeft = 3;
		} else if (chNext == 'u') {
			digitsLeft = 8;
		} else if (IsADigit(chNext)) {
			digitsLeft = 3;
			hex = false;
		}
	}
	bool atEscapeEnd(int ch) noexcept {
		--digitsLeft;
		return digitsLeft <= 0 || !IsDecimalOrHex(ch, hex);
	}
};

bool IsLongBracket(LexAccessor &styler, Sci_PositionU pos, bool start, int &delimiterCount) noexcept {
	int offset = 0;
	char ch;
	++pos; // bracket
	while ((ch = styler[pos]) == '=') {
		++offset;
		++pos;
	}

	if (start) {
		if (ch == '[') {
			delimiterCount = offset;
			return true;
		}
	} else {
		if (ch == ']' && offset == delimiterCount) {
			return true;
		}
	}
	return false;
}

// https://en.cppreference.com/w/c/io/fprintf
// 6.4 String Manipulation https://www.lua.org/manual/5.4/manual.html#6.4
constexpr bool IsFormatSpecifier(char ch) noexcept {
	return AnyOf(ch, 'a', 'A',
					'c',
					'd',
					'e', 'E',
					'f', 'F',
					'g', 'G',
					'i',
					'l',
					'o',
					'P',
					'q',
					's',
					'u',
					'w',
					'x', 'X');
}

inline Sci_Position CheckFormatSpecifier(const StyleContext &sc, LexAccessor &styler, bool insideUrl) noexcept {
	if (sc.chNext == '%') {
		return 2;
	}
	if (insideUrl && IsHexDigit(sc.chNext)) {
		// percent encoded URL string
		return 0;
	}
	if (IsASpaceOrTab(sc.chNext) && IsADigit(sc.chPrev)) {
		// ignore word after percent: "5% x"
		return 0;
	}

	Sci_PositionU pos = sc.currentPos + 1;
	char ch = styler[pos];
	// flags
	while (AnyOf(ch, '-', '+', ' ', '0', '#')) {
		ch = styler[++pos];
	}
	// [width]
	while (IsADigit(ch)) {
		ch = styler[++pos];
	}
	// [.precision]
	if (ch == '.') {
		ch = styler[++pos];
		while (IsADigit(ch)) {
			ch = styler[++pos];
		}
	}
	// [type]
	if (IsFormatSpecifier(ch)) {
		++pos;
		return pos - sc.currentPos;
	}
	return 0;
}

bool HighlightFormatSpecifier(StyleContext &sc, bool &insideUrl) {
	if (sc.Match(':', '/', '/') && IsLowerCase(sc.chPrev)) {
		insideUrl = true;
	} else if (insideUrl && IsInvalidUrlChar(sc.ch)) {
		insideUrl = false;
	} else if (sc.ch == '%') {
		const Sci_Position length = CheckFormatSpecifier(sc, sc.styler, insideUrl);
		if (length != 0) {
			const int state = sc.state;
			sc.SetState(SCE_LUA_FORMAT_SPECIFIER);
			sc.Advance(length);
			sc.SetState(state);
			return true;
		}
	}
	return false;
}

enum {
	LuaLineStateLineComment = 1,
	LuaLineStateLineContinuation = 1 << 1,
	LuaLineStateSkipwhitespace = 1 << 2,
};

enum class KeywordType {
	None = SCE_LUA_DEFAULT,
	Attribute = SCE_LUA_ATTRIBUTE,	// < name >
	Function = SCE_LUA_FUNCTION_DEFINITION,
	Goto = SCE_LUA_LABEL,	// goto name
	Colon,					// :: name ::
};

void ColouriseLuaDoc(Sci_PositionU startPos, Sci_Position lengthDoc, int initStyle, LexerWordList keywordLists, Accessor &styler) {
	int lineState = 0;
	int delimiterCount = 0;
	int visibleChars = 0;
	bool insideUrl = false;
	KeywordType kwType = KeywordType::None;
	EscapeSequence escSeq;

	StyleContext sc(startPos, lengthDoc, initStyle, styler);
	if (sc.currentLine > 0) {
		lineState = styler.GetLineState(sc.currentLine - 1);
		delimiterCount = lineState >> 8;
		lineState &= LuaLineStateLineContinuation | LuaLineStateSkipwhitespace;
	}

	if (startPos == 0 && sc.Match('#', '!')) {
		// shbang line
		lineState = LuaLineStateLineComment;
		sc.SetState(SCE_LUA_COMMENTLINE);
		sc.Forward();
	}

	while (sc.More()) {
		switch (sc.state) {
		case SCE_LUA_OPERATOR:
			sc.SetState(SCE_LUA_DEFAULT);
			break;

		case SCE_LUA_NUMBER:
			if (!IsDecimalNumberEx(sc.chPrev, sc.ch, sc.chNext)) {
				sc.SetState(SCE_LUA_DEFAULT);
			}
			break;

		case SCE_LUA_COMMENTLINE:
			if (sc.atLineStart) {
				sc.SetState(SCE_LUA_DEFAULT);
			}
			break;

		case SCE_LUA_IDENTIFIER:
			if (!IsIdentifierCharEx(sc.ch)) {
				char s[MaxKeywordSize];
				sc.GetCurrent(s, sizeof(s));
				if (keywordLists[KeywordIndex_Keyword].InList(s)) {
					sc.ChangeState(SCE_LUA_WORD);
					kwType = KeywordType::None;
					if (StrEqual(s, "function")) {
						kwType = KeywordType::Function;
					} else if (StrEqual(s, "goto")) {
						kwType = KeywordType::Goto;
					}
				} else if (keywordLists[KeywordIndex_BasicFunction].InListPrefixed(s, '(')) {
					sc.ChangeState(SCE_LUA_BUILTIN_FUNC);
				} else if (keywordLists[KeywordIndex_Metamethod].InList(s)) {
					sc.ChangeState(SCE_LUA_METAMETHOD);
				} else {
					const int chNext = sc.GetLineNextChar();
					if (chNext == '(') {
						sc.ChangeState((kwType == KeywordType::Function) ? SCE_LUA_FUNCTION_DEFINITION : SCE_LUA_FUNCTION);
					} else if (kwType == KeywordType::Goto || (kwType == KeywordType::Colon && chNext == ':')) {
						sc.ChangeState(SCE_LUA_LABEL);
					} else if (kwType == KeywordType::Attribute && chNext == '>') {
						sc.ChangeState(SCE_LUA_ATTRIBUTE);
					}
				}
				if (sc.state != SCE_LUA_WORD) {
					kwType = KeywordType::None;
				}
				sc.SetState(SCE_LUA_DEFAULT);
			}
			break;

		case SCE_LUA_STRING_SQ:
		case SCE_LUA_STRING_DQ:
			if (sc.atLineStart) {
				if (!(lineState & (LuaLineStateLineContinuation | LuaLineStateSkipwhitespace))) {
					sc.SetState(SCE_LUA_DEFAULT);
					break;
				}
				lineState &= LuaLineStateSkipwhitespace;
			}
			if ((lineState & LuaLineStateSkipwhitespace) != 0 && !IsASpace(sc.ch)) {
				lineState = 0;
			}
			if (sc.ch == '\\') {
				if (IsEOLChar(sc.chNext)) {
					lineState |= LuaLineStateLineContinuation;
				} else {
					if (sc.chNext == 'z') {
						lineState |= LuaLineStateSkipwhitespace;
					}
					escSeq.resetEscapeState(sc.state, sc.chNext);
					sc.SetState(SCE_LUA_ESCAPECHAR);
					sc.Forward();
					if (sc.Match('u', '{')) {
						escSeq.brace = true;
						sc.Forward();
					}
				}
			} else if (sc.ch == ((sc.state == SCE_LUA_STRING_SQ) ? '\'' : '\"')) {
				lineState = 0;
				sc.ForwardSetState(SCE_LUA_DEFAULT);
			} else if (HighlightFormatSpecifier(sc, insideUrl)) {
				continue;
			}
			break;

		case SCE_LUA_ESCAPECHAR:
			if (escSeq.atEscapeEnd(sc.ch)) {
				if (escSeq.brace && sc.ch == '}') {
					sc.Forward();
				}
				sc.SetState(escSeq.outerState);
				continue;
			}
			break;

		case SCE_LUA_COMMENT:
		case SCE_LUA_LITERALSTRING:
			if (sc.ch == ']' && (sc.chNext == '=' || sc.chNext == ']')) {
				if (IsLongBracket(styler, sc.currentPos, false, delimiterCount)) {
					sc.Advance(1 + delimiterCount);
					sc.ForwardSetState(SCE_LUA_DEFAULT);
					delimiterCount = 0;
				}
			} else if (sc.state == SCE_LUA_LITERALSTRING && HighlightFormatSpecifier(sc, insideUrl)) {
				continue;
			}
			break;
		}

		if (sc.state == SCE_LUA_DEFAULT) {
			if (sc.ch == '\"') {
				lineState = 0;
				insideUrl = false;
				sc.SetState(SCE_LUA_STRING_DQ);
			} else if (sc.ch == '\'') {
				lineState = 0;
				insideUrl = false;
				sc.SetState(SCE_LUA_STRING_SQ);
			} else if (sc.ch == '[' && (sc.chNext == '=' || sc.chNext == '[')) {
				if (IsLongBracket(styler, sc.currentPos, true, delimiterCount)) {
					sc.SetState(SCE_LUA_LITERALSTRING);
					sc.Advance(2 + delimiterCount);
				} else {
					sc.SetState(SCE_LUA_OPERATOR);
				}
			} else if (sc.Match('-', '-')) {
				sc.SetState(SCE_LUA_COMMENTLINE);
				sc.Advance(2);
				if (sc.ch == '[' && (sc.chNext == '=' || sc.chNext == '[')) {
					if (IsLongBracket(styler, sc.currentPos, true, delimiterCount)) {
						sc.ChangeState(SCE_LUA_COMMENT);
						sc.Advance(2 + delimiterCount);
					}
				}
				if (sc.state == SCE_LUA_COMMENTLINE && visibleChars == 0) {
					lineState = LuaLineStateLineComment;
				}
			} else if (IsNumberStart(sc.ch, sc.chNext)) {
				sc.SetState(SCE_LUA_NUMBER);
			} else if (IsIdentifierStartEx(sc.ch)) {
				sc.SetState(SCE_LUA_IDENTIFIER);
			} else if (IsAGraphic(sc.ch)) {
				sc.SetState(SCE_LUA_OPERATOR);
				kwType = KeywordType::None;
				if (sc.ch == '<') {
					kwType = KeywordType::Attribute;
				} else if (sc.Match(':', ':')) {
					kwType = KeywordType::Colon;
					sc.Forward();
				}
			}
		}

		if (visibleChars == 0 && !isspacechar(sc.ch)) {
			visibleChars++;
		}
		if (sc.atLineEnd) {
			styler.SetLineState(sc.currentLine, lineState | (delimiterCount << 8));
			lineState &= LuaLineStateLineContinuation | LuaLineStateSkipwhitespace;
			visibleChars = 0;
			kwType = KeywordType::None;
			insideUrl = false;
		}
		sc.Forward();
	}

	sc.Complete();
}

constexpr int GetLineCommentState(int lineState) noexcept {
	return lineState & LuaLineStateLineComment;
}

void FoldLuaDoc(Sci_PositionU startPos, Sci_Position lengthDoc, int initStyle, LexerWordList /*keywordLists*/, Accessor &styler) {
	const Sci_PositionU endPos = startPos + lengthDoc;
	Sci_Line lineCurrent = styler.GetLine(startPos);
	int levelCurrent = SC_FOLDLEVELBASE;
	int lineCommentPrev = 0;
	if (lineCurrent > 0) {
		levelCurrent = styler.LevelAt(lineCurrent - 1) >> 16;
		lineCommentPrev = GetLineCommentState(styler.GetLineState(lineCurrent - 1));
	}

	int levelNext = levelCurrent;
	int lineCommentCurrent = GetLineCommentState(styler.GetLineState(lineCurrent));
	Sci_PositionU lineStartNext = styler.LineStart(lineCurrent + 1);

	int styleNext = styler.StyleIndexAt(startPos);
	int style = initStyle;

	char word[10]; // function
	constexpr int MaxFoldWordLength = sizeof(word) - 1;
	int wordLen = 0;

	while (startPos < endPos) {
		const char ch = styler[startPos];
		const int stylePrev = style;
		style = styleNext;
		styleNext = styler.StyleIndexAt(++startPos);

		switch (style) {
		case SCE_LUA_WORD:
			if (wordLen < MaxFoldWordLength) {
				word[wordLen++] = ch;
			}
			if (styleNext != SCE_LUA_WORD) {
				word[wordLen] = '\0';
				wordLen = 0;
				if (StrEqualsAny(word, "if", "do", "function", "repeat")) {
					levelNext++;
				} else if (StrEqualsAny(word, "end", "until")) {
					levelNext--;
				}
			}
			break;

		case SCE_LUA_OPERATOR:
			if (ch == '{' || ch == '[' || ch == '(') {
				levelNext++;
			} else if (ch == '}' || ch == ']' || ch == ')') {
				levelNext--;
			}
			break;

		case SCE_LUA_LITERALSTRING:
		case SCE_LUA_COMMENT:
			if (stylePrev != style) {
				levelNext++;
			} else if (styleNext != style) {
				levelNext--;
			}
			break;
		}

		if (startPos == lineStartNext) {
			const int lineCommentNext = GetLineCommentState(styler.GetLineState(lineCurrent + 1));
			levelNext = sci::max(levelNext, SC_FOLDLEVELBASE);
			if (lineCommentCurrent) {
				levelNext += lineCommentNext - lineCommentPrev;
			}

			const int levelUse = levelCurrent;
			int lev = levelUse | (levelNext << 16);
			if (levelUse < levelNext) {
				lev |= SC_FOLDLEVELHEADERFLAG;
			}
			styler.SetLevel(lineCurrent, lev);

			lineCurrent++;
			lineStartNext = styler.LineStart(lineCurrent + 1);
			levelCurrent = levelNext;
			lineCommentPrev = lineCommentCurrent;
			lineCommentCurrent = lineCommentNext;
		}
	}
}

}

extern const LexerModule lmLua(SCLEX_LUA, ColouriseLuaDoc, "lua", FoldLuaDoc);
